# import libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Activation, Dropout, Dense, GlobalMaxPooling2D
import os
import matplotlib.pyplot as plt
import seaborn as sns
import PIL
import PIL.Image
from sklearn.metrics import confusion_matrix
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# specify path to your data
path = 'data'
train = os.path.join(path, 'train')
valid = os.path.join(path, 'valid')
test = os.path.join(path, 'test')
# normalize images (you can also augment training dataset if needed here)
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
rotation_range=0,
shear_range=0,
zoom_range=0,
vertical_flip=False,
horizontal_flip=False)
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
# specify variables
target_size = (224,224)
batch_size = 32
# create batches of tensor data
train_data_gen = train_datagen.flow_from_directory(
train,
target_size=target_size,
batch_size=batch_size,
class_mode='binary')
valid_data_gen = valid_datagen.flow_from_directory(
valid,
target_size=target_size,
batch_size=batch_size,
class_mode='binary',
shuffle=False)
test_data_gen = test_datagen.flow_from_directory(
test,
target_size=target_size,
batch_size=batch_size,
class_mode='binary',
shuffle=False)
Found 350 images belonging to 2 classes. Found 100 images belonging to 2 classes. Found 50 images belonging to 2 classes.
# get class label dictionary
class_labels = train_data_gen.class_indices
class_labels = {v: k for k, v in class_labels.items()}
class_labels
{0: 'ColonACA', 1: 'LungACA'}
# visualize batch of images
sample_training_images, labels = next(train_data_gen)
def plotImages(images_arr, labels):
fig, axes = plt.subplots(4, 4, figsize=(15,15))
axes = axes.flatten()
for img, lbs, ax in zip( images_arr, labels, axes):
ax.imshow(img)
ax.set_title(class_labels[lbs])
ax.axis('off')
plt.tight_layout()
plt.show()
plotImages(sample_training_images[:16], labels[:16])
# get base model, do not include the ImageNet classifier at the top
input_shape = (224,224,3)
base_model = tf.keras.applications.ResNet50(
include_top=False,
weights="imagenet",
input_shape=input_shape
)
# freeze the base model
base_model.trainable = False
# add your classifier to the base model
model = Sequential()
model.add(base_model)
model.add(GlobalMaxPooling2D())
model.add(Dropout(0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.summary()
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= resnet50 (Functional) (None, 7, 7, 2048) 23587712 _________________________________________________________________ global_max_pooling2d_1 (Glob (None, 2048) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 2048) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 2049 _________________________________________________________________ activation_1 (Activation) (None, 1) 0 ================================================================= Total params: 23,589,761 Trainable params: 2,049 Non-trainable params: 23,587,712 _________________________________________________________________
# compile and fit the model
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[tf.keras.metrics.BinaryAccuracy()],
)
epochs = 10
model.fit(train_data_gen, epochs=epochs, validation_data=valid_data_gen)
Epoch 1/10 11/11 [==============================] - 15s 1s/step - loss: 0.5172 - binary_accuracy: 0.7429 - val_loss: 0.4149 - val_binary_accuracy: 0.8700 Epoch 2/10 11/11 [==============================] - 13s 1s/step - loss: 0.4461 - binary_accuracy: 0.7943 - val_loss: 0.3874 - val_binary_accuracy: 0.8900 Epoch 3/10 11/11 [==============================] - 13s 1s/step - loss: 0.4547 - binary_accuracy: 0.7857 - val_loss: 0.3755 - val_binary_accuracy: 0.9100 Epoch 4/10 11/11 [==============================] - 13s 1s/step - loss: 0.4713 - binary_accuracy: 0.7857 - val_loss: 0.3596 - val_binary_accuracy: 0.9000 Epoch 5/10 11/11 [==============================] - 13s 1s/step - loss: 0.4137 - binary_accuracy: 0.8029 - val_loss: 0.3396 - val_binary_accuracy: 0.9300 Epoch 6/10 11/11 [==============================] - 13s 1s/step - loss: 0.4111 - binary_accuracy: 0.8200 - val_loss: 0.3281 - val_binary_accuracy: 0.9200 Epoch 7/10 11/11 [==============================] - 13s 1s/step - loss: 0.3998 - binary_accuracy: 0.8371 - val_loss: 0.3200 - val_binary_accuracy: 0.9300 Epoch 8/10 11/11 [==============================] - 13s 1s/step - loss: 0.3893 - binary_accuracy: 0.8086 - val_loss: 0.3555 - val_binary_accuracy: 0.8300 Epoch 9/10 11/11 [==============================] - 13s 1s/step - loss: 0.3559 - binary_accuracy: 0.8600 - val_loss: 0.2997 - val_binary_accuracy: 0.9300 Epoch 10/10 11/11 [==============================] - 13s 1s/step - loss: 0.3514 - binary_accuracy: 0.8743 - val_loss: 0.2939 - val_binary_accuracy: 0.9400
<tensorflow.python.keras.callbacks.History at 0x7fd9b2fd2430>
model.save('my_model.h5') # creates a HDF5 file 'my_model.h5'
del model # deletes the existing model
# load saved model
model = load_model('my_model.h5')
# make predictions on unseen testing set
predict_prob = model.predict(test_data_gen)
predictions = predict_prob > 0.5
cm = confusion_matrix(test_data_gen.classes, predictions)
cm
array([[22, 3],
[ 0, 25]])
# calculater sensitivity, specificity, positive predictive value, negative predictive value
cm = confusion_matrix(test_data_gen.classes, predictions)
TN = cm[0,0]
FP = cm[0,1]
FN = cm[1,0]
TP = cm[1,1]
sensitivity = (TP/(TP + FN))*100
specificity = (TN/(TN + FP))*100
PPV = (TP/(TP + FP))*100
NPV = (TN/(TN + FN))*100
print(f'Sensitivity: {sensitivity: .2f}%')
print(f'Specificity: {specificity: .2f}%')
print(f'Positive predictive value:{PPV: .2f}%')
print(f'Negative predictive value:{NPV: .2f}%')
Sensitivity: 100.00% Specificity: 88.00% Positive predictive value: 89.29% Negative predictive value: 100.00%
# plot confusion matrix
df_cm = pd.DataFrame(cm,
columns=['Colon Adenocarcinoma', 'Lung Adenocarcinoma'],
index = ['Colon Adenocarcinoma', 'Lung Adenocarcinoma'])
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (15,10))
sns.set(font_scale=1.5)
sns.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 20})
<AxesSubplot:xlabel='Predicted', ylabel='Actual'>
# make prediction on single image
image_path = 'data/test/ColonACA/Image084.jpeg'
class_names = ['Colon Adenocarcinoma', 'Lung Adenocarcinoma']
img = tf.keras.preprocessing.image.load_img(
image_path, target_size=(224,224)
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
if predictions[0][0] < 0.5:
print(f'This image most likely belongs to Colon Adenocarcinoma')
else:
print(f'This image most likely belongs to Lung Adenocarcinoma')
print(predictions[0][0])
PIL.Image.open(image_path)
This image most likely belongs to Colon Adenocarcinoma 0.2389245
# make prediction on single image
image_path = 'data/test/LungACA/Image056.jpeg'
class_names = ['Colon Adenocarcinoma', 'Lung Adenocarcinoma']
img = tf.keras.preprocessing.image.load_img(
image_path, target_size=(224,224)
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
if predictions[0][0] < 0.5:
print(f'This image most likely belongs to Colon Adenocarcinoma')
else:
print(f'This image most likely belongs to Lung Adenocarcinoma')
print(predictions[0][0])
PIL.Image.open(image_path)
This image most likely belongs to Lung Adenocarcinoma 0.97228056